{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Finetuning ruGPT3Xl example\n",
    "We test this pipeline on `torch 1.7.1+cu110, cuda 11.0, triton 1.0.0, deepspeed 0.7.4`, `A100 GPU` and it takes around `32GB GPU memory`."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "r-XkQgf-ufgE"
   },
   "source": [
    "## Install env"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "id": "gH2kLmcXmL3e"
   },
   "outputs": [],
   "source": [
    "%%bash\n",
    "export LD_LIBRARY_PATH=/usr/lib/"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "NRkBHiuOFYB-"
   },
   "outputs": [],
   "source": [
    "!apt-get install clang-9 llvm-9 llvm-9-dev llvm-9-tools"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "id": "GfdFeAVylpIQ"
   },
   "outputs": [],
   "source": [
    "!rm -rf apex"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "zFqiiOnRnh9p"
   },
   "outputs": [],
   "source": [
    "%%bash\n",
    "git clone https://github.com/qywu/apex\n",
    "cd apex\n",
    "pip install -v --no-cache-dir --global-option=\"--cpp_ext\" --global-option=\"--cuda_ext\" ./"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "7t-l-Gq6mGYg"
   },
   "outputs": [],
   "source": [
    "!pip install triton==1.0.0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "LEyrQ-spCdhR"
   },
   "outputs": [],
   "source": [
    "!DS_BUILD_CPU_ADAM=1 DS_BUILD_SPARSE_ATTN=1 pip install deepspeed==0.7.4"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "---7k9osCliv",
    "outputId": "db81d843-c418-47d7-a7cc-51ba54a9fcb5"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "--------------------------------------------------\n",
      "DeepSpeed C++/CUDA extension op report\n",
      "--------------------------------------------------\n",
      "NOTE: Ops not installed will be just-in-time (JIT) compiled at\n",
      "      runtime if needed. Op compatibility means that your system\n",
      "      meet the required dependencies to JIT install the op.\n",
      "--------------------------------------------------\n",
      "JIT compiled ops requires ninja\n",
      "ninja .................. \u001b[92m[OKAY]\u001b[0m\n",
      "--------------------------------------------------\n",
      "op name ................ installed .. compatible\n",
      "--------------------------------------------------\n",
      "cpu_adam ............... \u001b[92m[YES]\u001b[0m ...... \u001b[92m[OKAY]\u001b[0m\n",
      "cpu_adagrad ............ \u001b[93m[NO]\u001b[0m ....... \u001b[92m[OKAY]\u001b[0m\n",
      "fused_adam ............. \u001b[93m[NO]\u001b[0m ....... \u001b[92m[OKAY]\u001b[0m\n",
      "fused_lamb ............. \u001b[93m[NO]\u001b[0m ....... \u001b[92m[OKAY]\u001b[0m\n",
      "sparse_attn ............ \u001b[92m[YES]\u001b[0m ...... \u001b[92m[OKAY]\u001b[0m\n",
      "transformer ............ \u001b[93m[NO]\u001b[0m ....... \u001b[92m[OKAY]\u001b[0m\n",
      "stochastic_transformer . \u001b[93m[NO]\u001b[0m ....... \u001b[92m[OKAY]\u001b[0m\n",
      "\u001b[93m [WARNING] \u001b[0m async_io requires the dev libaio .so object and headers but these were not found.\n",
      "\u001b[93m [WARNING] \u001b[0m async_io: please install the libaio-dev package with apt\n",
      "\u001b[93m [WARNING] \u001b[0m If libaio is already installed (perhaps from source), try setting the CFLAGS and LDFLAGS environment variables to where it can be found.\n",
      "async_io ............... \u001b[93m[NO]\u001b[0m ....... \u001b[93m[NO]\u001b[0m\n",
      "transformer_inference .. \u001b[93m[NO]\u001b[0m ....... \u001b[92m[OKAY]\u001b[0m\n",
      "utils .................. \u001b[93m[NO]\u001b[0m ....... \u001b[92m[OKAY]\u001b[0m\n",
      "quantizer .............. \u001b[93m[NO]\u001b[0m ....... \u001b[92m[OKAY]\u001b[0m\n",
      "--------------------------------------------------\n",
      "DeepSpeed general environment info:\n",
      "torch install path ............... ['/home/user/conda/lib/python3.7/site-packages/torch']\n",
      "torch version .................... 1.7.1+cu110\n",
      "torch cuda version ............... 11.0\n",
      "nvcc version ..................... 11.0\n",
      "deepspeed install path ........... ['/home/user/conda/lib/python3.7/site-packages/deepspeed']\n",
      "deepspeed info ................... 0.7.4, unknown, unknown\n",
      "deepspeed wheel compiled w. ...... torch 1.7, cuda 11.0\n"
     ]
    }
   ],
   "source": [
    "!ds_report"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "rl1JIPsK8viz"
   },
   "source": [
    "Test, that deepspeed was installed correctly."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "id": "DqPIkEYjCmLH"
   },
   "outputs": [],
   "source": [
    "import deepspeed.ops.sparse_attention.sparse_attn_op"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "id": "gw4lJHfX4QnB"
   },
   "outputs": [],
   "source": [
    "!rm -rf ru-gpts"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "t43yH5k1jtZZ"
   },
   "outputs": [],
   "source": [
    "!git clone  https://github.com/sberbank-ai/ru-gpts"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "H2XiJvm_tQgL"
   },
   "outputs": [],
   "source": [
    "!pip install transformers==4.24.0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "HN9FJ3rKnO5Q"
   },
   "outputs": [],
   "source": [
    "!pip install timm==0.3.2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "id": "1RCujWwJUwm-"
   },
   "outputs": [],
   "source": [
    "!cp ru-gpts/src_utils/trainer_pt_utils.py /usr/local/lib/python3.8/dist-packages/transformers/trainer_pt_utils.py"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "GHhCIrDQAAdJ"
   },
   "outputs": [],
   "source": [
    "!cp ru-gpts/src_utils/_amp_state.py /usr/local/lib/python3.8/dist-packages/apex/amp/_amp_state.py"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Prepare data\n",
    "Download example data."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!wget -O train.txt https://www.dropbox.com/s/oa3v9c7g9bp40xw/train.txt?dl=0\n",
    "!wget -O valid.txt https://www.dropbox.com/s/mworl3ld6r3bg62/valid.txt?dl=0"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We should pass to our model list of text files as following:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Writing train.list\n"
     ]
    }
   ],
   "source": [
    "%%writefile train.list\n",
    "train.txt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Writing valid.list\n"
     ]
    }
   ],
   "source": [
    "%%writefile valid.list\n",
    "valid.txt"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Start finetune\n",
    "We configure training script at `scripts/deepspeed_gpt3_xl_finetune.sh`"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "```bash\n",
    "%%bash\n",
    "\n",
    "# Model parallel size\n",
    "MP_SIZE=1\n",
    "# Change for multinode config\n",
    "NUM_GPUS_PER_WORKER=1\n",
    "\n",
    "gpt_options=\" \\\n",
    "       --train-data-path examples/train.list \\\n",
    "       --test-data-path examples/valid.list \\\n",
    "       --load-huggingface sberbank-ai/rugpt3xl \\\n",
    "       --logging-dir=examples/log/ \\\n",
    "       --save examples/model \\\n",
    "       --save-interval 200 \\\n",
    "       --model-parallel-size ${MP_SIZE} \\\n",
    "       --num-layers 24 \\\n",
    "       --hidden-size 2048 \\\n",
    "       --num-attention-heads 16 \\\n",
    "       --batch-size 1 \\\n",
    "       --seq-length 2048 \\\n",
    "       --max-position-embeddings 2048 \\\n",
    "       --train-iters 1000 \\\n",
    "       --distributed-backend nccl \\\n",
    "       --lr 0.0002 \\\n",
    "       --lr-decay-style cosine \\\n",
    "       --weight-decay 1e-2 \\\n",
    "       --warmup .01 \\\n",
    "       --log-interval 50 \\\n",
    "       --fp16 \\\n",
    "       --checkpoint-activations \\\n",
    "       --deepspeed-activation-checkpointing \\\n",
    "       --sparse-mode alternating \\\n",
    "       --deepspeed \\\n",
    "       --deepspeed_config src/deepspeed_config/gpt3_xl_sparse_2048.json \\\n",
    "\"\n",
    "\n",
    "run_cmd=\"USE_DEEPSPEED=1 python -m torch.distributed.launch --nproc_per_node=${NUM_GPUS_PER_WORKER} pretrain_gpt3.py $@ ${gpt_options}\"\n",
    "echo \"${run_cmd}\"\n",
    "eval \"${run_cmd}\"\n",
    "\n",
    "set +x\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Here\n",
    "* `train-data-path` - train data path\n",
    "* `test-data-path` - test data path\n",
    "* `load-huggingface` - model with we want to finetune\n",
    "\n",
    "and other <i>standart</i> tuning parameters.\n",
    "\n",
    "**Note!** `batch-size 1` because we use only 32 GB of GPU memory."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Run script from content of repo root!!!\n",
    "`sh scripts/deepspeed_gpt3_xl_finetune.sh`"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "After run training script we can see progress on tensorboard: "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "%load_ext tensorboard"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%tensorboard --logdir log/"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Test model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "id": "EnTy1SEajpPV"
   },
   "outputs": [],
   "source": [
    "import warnings\n",
    "warnings.filterwarnings(\"ignore\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "id": "Z-DSEz0ljpPV"
   },
   "outputs": [],
   "source": [
    "import sys\n",
    "sys.path.append(\"../../ru-gpts/\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "id": "_9GABoxNVpH4"
   },
   "outputs": [],
   "source": [
    "import os\n",
    "os.environ[\"USE_DEEPSPEED\"] = \"1\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "id": "_HeCFnJEjpPV"
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2022-12-06 21:35:45.765088: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.11.0\n"
     ]
    }
   ],
   "source": [
    "from src.xl_wrapper import RuGPT3XL"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "OdyughHDjpPV"
   },
   "source": [
    "Note! seq_len is max sequence length for generation used in generation process. Max avialable seq_len is 2048 (in tokens).\n",
    "Also inference takes around 10 Gb GPU memory."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "Ik1FP4Jcq8tg",
    "outputId": "bf562319-c269-4d47-e1b8-a1c47796717f"
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import torch\n",
    "torch.cuda.is_available()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "id": "BKpw5ik2uWrw"
   },
   "outputs": [],
   "source": [
    "from src import mpu"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "id": "T0hDbxdvvUiO"
   },
   "outputs": [],
   "source": [
    "os.environ[\"MASTER_ADDR\"] = \"127.0.0.1\"\n",
    "os.environ[\"MASTER_PORT\"] = \"5000\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "56aNJNPYjpPW",
    "outputId": "3ac62539-8ce7-413f-ee72-396908d4f59a",
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "# gpt = RuGPT3XL.from_pretrained(\"sberbank-ai/rugpt3xl\", seq_len=512)\n",
    "gpt = RuGPT3XL.from_pretrained(\n",
    "    \"sberbank-ai/rugpt3xl\",\n",
    "    weights_path=\"model/400/mp_rank_00_model_states.pt\",\n",
    "    seq_len=2048\n",
    ")\n",
    "# model parallel group is not initialized - если не подключена gpu"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "def filter_resuls(nr):\n",
    "    return [x[:x.find(\"<|endoftext|>\")] for x in nr]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "res = gpt.generate(\n",
    "    \"<s>Тема: «Создает человека природа, но развивает и образует его общество». (В.Т. Белинский)\\nСочинение: \",\n",
    "    max_length=512,\n",
    "    repetition_penalty=5.0, do_sample=True, top_k=5, top_p=0.95, temperature=1\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'<s>Тема: «Создает человека природа, но развивает и образует его общество». (В.Т. Белинский)\\nСочинение: \\xa0\\xa0\\xa0 | - Человек это высшая ступень развития живых организмов на земле, субъект общественно-исторической деятельностии культуры; — человек прочно связан с обществом как со своим главным агентом социализации детей раннего возраста. Согласно антропосоциогенеза структура человеческой личности определяется четырьмя основными составляющими : соматологической(составление карты больного), психической («мысль изреченная есть закон законов»), социальной активности поведения ребенка в момент рождения или вскоре после него.(А какие же качества лично вам кажутся самыми важными? Думается мне что автор хотел указать значение игровой игры для формирования здорового молодого организма.) И я полностью разделяю мнение известного литератора В.-Г..Белинского о том,,что игра является основным фактором обновления основных жизненных сил человеческого тела... Действительно ли так бывает? У каждого из нас свои'"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "res[0][:1000]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "accelerator": "GPU",
  "colab": {
   "name": "ruGPT3XL_generation.ipynb",
   "provenance": []
  },
  "gpuClass": "standard",
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
